# Prepare VT returns
# Note: clean and rearrange 2020 VT returns
# By Dom Valentino and Chris Kenny
# libs --------------------------------------------------------------------
library(tidyverse) # select(), filter(), mutate(), etc.
library(readxl) # read_excel()

# president ---------------------------------------------------------------
vt <- read_excel("../data/returns_vt/Federal_US PRESIDENT AND VICE PRESIDENT_11_03_2020 - GENERAL ELECTION_20201129T171359.xlsx")
vt <- vt %>% select(Town, `DONALD J. TRUMP and MICHAEL R. PENCE`, `JOSEPH R. BIDEN and KAMALA D. HARRIS`, `Total Votes`)

# Hand written coding of town to county
countytown <- read_excel("../data/returns_vt/Vermont_County_Town.xlsx")
countytown <- countytown %>% mutate(Town = str_to_upper(Town))
countytown <- countytown %>% mutate(Town = str_trim(Town))

# merge by town to add counties.
vt_m <- vt %>% left_join(y = countytown, by = "Town")
colnames(vt_m) <- c('Town', 'Trump', 'Biden', 'Total Votes', 'County')

# create csv format
out <- vt_m %>% group_by(County) %>% 
  dplyr::summarize(Trump = sum(as.numeric(Trump)), 
            Biden = sum(as.numeric(Biden)),
            ballots_cast = sum(as.numeric(`Total Votes`))) %>% 
  ungroup() %>% 
  mutate(dem_share_pres = Biden/(Trump+Biden)) %>% 
  select(-Trump, -Biden)

# governor ----------------------------------------------------------------
vt <- read_excel("../data/returns_vt/StateWide_GOVERNOR_11_03_2020 - GENERAL ELECTION_20201130T131630.xlsx")
vt <- vt %>% select(Town, `PHIL SCOTT`, `DAVID ZUCKERMAN`, `Total Votes`)

# Hand written coding of town to county
countytown <- read_excel("../data/returns_vt/Vermont_County_Town.xlsx")
countytown <- countytown %>% mutate(Town = str_to_upper(Town))
countytown <- countytown %>% mutate(Town = str_trim(Town))

# merge by town to add counties
vt_m <- vt %>% left_join(y = countytown, by = "Town")
names(vt_m) <- c('Town', 'Scott', 'Zuckerman', 'Total Votes', 'County')

# create csv format
out_gov <- vt_m %>% group_by(County) %>% 
  dplyr::summarize(Scott = sum(as.numeric(Scott)), 
            Zuckerman = sum(as.numeric(Zuckerman)),
            ballots_cast = sum(as.numeric(`Total Votes`))) %>% 
  ungroup() %>% 
  mutate(dem_share_gov = Zuckerman/(Scott+Zuckerman)) %>% 
  select(-Scott, -Zuckerman)

# merge -------------------------------------------------------------------
cvap <- read_dta("../data/modified data/county_cvap.dta") # read cvap data
final_vt <- out %>% left_join(., out_gov, by = c("County", "ballots_cast")) %>% 
  transmute(county = County, 
            fips = NA_real_,
            treat = 1,
            dem_share_pres = dem_share_pres, 
            dem_share_sen = NA_real_,
            dem_share_gov = dem_share_gov,
            ballots_cast = ballots_cast,
            year = 2020) %>% 
  left_join(., cvap[cvap$year == 2020 & cvap$state == "VT", ], by = "county") %>% 
  mutate(year = year.x, turnout_share = ballots_cast / cvap_approx) %>% 
  select(-year.y, -year.x)

# save file
write_csv(final_vt, path = '../data/analysis/analysis_vt.csv')
